#https://blog.csdn.net/qq_38161040/article/details/88203864
from bs4 import BeautifulSoup
import requests
data=requests.get('https://archive.cloudera.com/cdh5/redhat/7/x86_64/cdh/5/RPMS/x86_64/').text
soup = BeautifulSoup(data, 'lxml')
tags = soup.find_all('a')
for tag in tags:
    print('https://archive.cloudera.com/cdh5/redhat/7/x86_64/cdh/5/RPMS/x86_64/',tag.get('href'),sep='')